luci-base: fix UTF handling in super fast hash function
authorPaul Donald <[email protected]>
Sat, 12 Apr 2025 20:04:32 +0000 (22:04 +0200)
committerPaul Donald <[email protected]>
Sat, 12 Apr 2025 20:33:02 +0000 (22:33 +0200)
follow-up fix for 2babc47ae2562cc123ea9048197996a0e3a223b1

the previous code did not handle:
-the 4 byte encoding case
-utf-16 surrogate pairs

Signed-off-by: Paul Donald <[email protected]>
modules/luci-base/htdocs/luci-static/resources/cbi.js

index d7b462c367e54106c58a4f48af5e016095210ea7..e694a105e417fc7daa21a40fc9d3fdac432b6044 100644 (file)
@@ -32,6 +32,15 @@ function sfh(s) {
        for (var i = 0; i < s.length; i++) {
                var ch = s.charCodeAt(i);
 
+               // Handle surrogate pairs
+               if (ch >= 0xD800 && ch <= 0xDBFF && i + 1 < s.length) {
+                       const next = s.charCodeAt(i + 1);
+                       if (next >= 0xDC00 && next <= 0xDFFF) {
+                               ch = 0x10000 + ((ch - 0xD800) << 10) + (next - 0xDC00);
+                               i++;
+                       }
+               }
+
                if (ch <= 0x7F)
                        bytes.push(ch);
                else if (ch <= 0x7FF)
@@ -41,7 +50,7 @@ function sfh(s) {
                        bytes.push(((ch >>> 12) & 0x0F) | 0xE0,
                                   ((ch >>>  6) & 0x3F) | 0x80,
                                   ( ch         & 0x3F) | 0x80);
-               else if (code <= 0x10FFFF)
+               else if (ch <= 0x10FFFF)
                        bytes.push(((ch >>> 18) & 0x07) | 0xF0,
                                   ((ch >>> 12) & 0x3F) | 0x80,
                                   ((ch >>   6) & 0x3F) | 0x80,
@@ -91,7 +100,7 @@ function sfh(s) {
        hash  = (hash ^ (hash << 25)) >>> 0;
        hash += hash >>> 6;
 
-       return (0x100000000 + hash).toString(16).substr(1);
+       return (0x100000000 + hash).toString(16).slice(1);
 }
 
 var plural_function = null;